clear
set matsize 5000
cd <directory>
**********************************************************************************************************************
** Takes the cleaned weather data (from weatherdata_clean.do) and the cleaned attendance data (from attendance_clean.do) and merges them all together with salary data.
**********************************************************************************************************************


use "attendance_merged_tamu_keepingallvars", clear
*convert to line number - these two lines of code from linelevelattendance_tamudata.do
destring BATCH, replace force
drop if BATCH==.

*About 1% of workers switch batches in this data - assign them to their initial batches

bys unit tokenno: egen mindate=min(date)
coun if mindate==.

bys unit tokenno: gen minBATCH=BATCH if date==mindate
drop minBATCH
gen minBATCH=BATCH if date==mindate
bys unit tokenno: egen mBATCH=mean(minBATCH)
coun if mBATCH==.
tab mBATCH
duplicates drop unit tokenno, force
keep unit tokenno mBATCH
ren mBATCH BATCH
*this measure of rno taken from "linelevelattendance_tamudata/do"
egen rno=group(unit BATCH)
save "linemapping_tokenno_fromattendancedata", replace

use "weather_2010to2014_allweeklags.dta", clear
collapse (sum) totprec=precipitation (mean) precipitation mintemperature rhomintemperature wbgtmintemperature himintemp maxtemperature rhomaxtemperature wbgtmaxtemperature himaxtemp rhomeant wbgtmeant meant himeant (max) maxmaxtemperature=maxtemperature maxrhomaxtemperature=rhomaxtemperature maxwbgtmaxtemperature=wbgtmaxtemperature maxhimaxtemp=himaxtemp (min) minmintemperature=mintemperature minrhomintemperature=rhomintemperature minwbgtmintemperature=wbgtmintemperature minhimintemp=himintemp, by(longitude latitude year month)

save "weather_2010to2014_monthly", replace

use "weather_2010to2014_allweeklags.dta", clear

g meantbin=meant>=27
g himeantbin=himeant>=26
g wbgtmeantbin=wbgtmeant>=19

g mintemperaturebin=mintemperature>=27
g himintempbin=himintemp>=26
g wbgtmintempbin=wbgtmintemperature>=19

g maxtemperaturebin=maxtemperature>=27
g himaxtempbin=himaxtemp>=26
g wbgtmaxtempbin=wbgtmaxtemperature>=19

collapse (sum) *bin, by(longitude latitude year month)
save "weather_2010to2014_monthlybinaries", replace

use "LEDprojecwithtamudatanew.dta" , clear
keep unit monthled yearled
duplicates drop unit, force
coun if monthled==.
save "monthandyearledfromlineleveldata", replace

use "LEDprojecwithtamudatanew.dta" , clear
keep if mactualeff!=.
keep unit year month
duplicates drop unit year month, force
save "monthsoflineleveefficiencydata", replace


clear
g month1=""
g year=.
foreach i of newlist jan feb mar apr may jun jul aug sep oct nov dec{
append using "sal_`i'_2010.dta"
replace month1="`i'" if month==""
replace year=2010 if year==.
coun if year==.
}

foreach i of newlist jan feb mar apr may jun jul aug sep oct nov dec{
append using "sal_`i'_2011.dta"
replace month1="`i'" if month==""
replace year=2011  if year==.
}

foreach i of newlist jan feb mar apr may jun jul aug sep oct nov dec{

append using "sal_`i'_2012.dta"
replace month1="`i'" if month==""
replace year=2012 if year==.
}

foreach i of newlist jan feb mar apr may jun jul aug sep oct nov dec{
append using "sal_`i'_2013.dta"
replace month1="`i'" if month==""
replace year=2013 if year==.
}

g unit=factory
g tokenno=tknno

merge m:1 unit using "monthandyearledfromlineleveldata"
keep if _merge==3


g month=1 if month1=="jan"
replace month=2 if month1=="feb"
replace month=3 if month1=="mar"
replace month=4 if month1=="apr"
replace month=5 if month1=="may"
replace month=6 if month1=="jun"
replace month=7 if month1=="jul"
replace month=8 if month1=="aug"
replace month=9 if month1=="sep"
replace month=10 if month1=="oct"
replace month=11 if month1=="nov"
replace month=12 if month1=="dec"

g monthy=year+(month/12)
destring yearled monthled, replace
g monthyled=yearled+(monthled/12)
g led=monthy>=monthyled
bys unit: egen unitled=mean(led)



merge m:1 unit using "tamu_allunitsmerge", gen(mm)
keep if mm==3
drop mm


merge m:1 longitude latitude year month using "weather_2010to2014_monthly", gen(mmm)
keep if mmm==3
drop mmm

merge m:1 longitude latitude year month using "weather_2010to2014_monthlybinaries", gen(mmm)
keep if mmm==3
drop mmm

merge m:1 unit year month using "monthsoflineleveefficiencydata", gen(m1)
g efficiencydataavailable=m1==3

drop m1
merge m:1 unit tokenno using "linemapping_tokenno_fromattendancedata", gen(mm)

foreach var of varlist meant himeant wbgtmeant *bin{
g `var'led=`var'*led
replace `var'led=. if `var'==. | led==.
}
bys unit: egen mled=mean(led)
tab unit if mled==0

save "monthlysalary_merged", replace
